Gráfico¶

In [ ]:
library("fredr")
library("tidyverse")
library("scales")
library("vroom")
library("ggplot2")
library("dplyr")
library("scales")
library("zoo")
library("gapminder")
library("gganimate")
library("utf8")
library("gridExtra")
library("socviz")
library("ggrepel")
Sys.setlocale("LC_ALL", "pt_br.utf-8")
library("lubridate")
library("readr")
library("ggthemes")
library("maps")
library("ggpomological")
library("ggthemr")
library("extrafont")
library("stringr")
'pt_br.utf-8/pt_br.utf-8/pt_br.utf-8/C/pt_br.utf-8/C'

theme_solarized()
theme_pomological()
scale_colour_solarized()
theme_wsj()

In [ ]:
# obs:
# mudança na escala feita da seguinte maneira:
options(repr.plot.width=15, repr.plot.height=8)
In [ ]:
# font_import()
In [ ]:
runners_data = read.csv("Runners.csv")
glimpse(runners_data)
Rows: 18,244
Columns: 10
$ Rank          <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14, 15, 16, 17,…
$ Time          <chr> "00:01:40.910000", "00:01:41.010000", "00:01:41.090000",…
$ Name          <chr> "David Rudisha", "David Rudisha", "David Rudisha", "Wils…
$ Country       <chr> "KEN", "KEN", "KEN", "DEN", "DEN", "KEN", "KEN", "KEN", …
$ Date.of.Birth <chr> "1988-12-17", "1988-12-17", "1988-12-17", "1970-12-12", …
$ Place         <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,…
$ City          <chr> "London", "Rieti", "Berlin", "Köln", "Zürich", "Rieti", …
$ Date          <chr> "2012-09-08", "2010-08-29", "2010-08-22", "1997-08-24", …
$ Gender        <chr> "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", …
$ Event         <chr> "800 m", "800 m", "800 m", "800 m", "800 m", "800 m", "8…
In [ ]:
runners_data$Date <- as.POSIXct(runners_data$Date, format = "%Y-%m-%d")
head(runners_data)
A data.frame: 6 x 10
RankTimeNameCountryDate.of.BirthPlaceCityDateGenderEvent
<int><chr><chr><chr><chr><int><chr><dttm><chr><chr>
1100:01:40.910000David Rudisha KEN1988-12-171London2012-09-08Men800 m
2200:01:41.010000David Rudisha KEN1988-12-171Rieti 2010-08-29Men800 m
3300:01:41.090000David Rudisha KEN1988-12-171Berlin2010-08-22Men800 m
4400:01:41.110000Wilson KipketerDEN1970-12-121Köln 1997-08-24Men800 m
5500:01:41.240000Wilson KipketerDEN1970-12-121Zürich1997-08-13Men800 m
6600:01:41.330000David Rudisha KEN1988-12-171Rieti 2011-10-09Men800 m
In [ ]:
runners_data <- runners_data |>
mutate(
    Age = year(runners_data$Date) - year(runners_data$Date.of.Birth	)
)
glimpse(runners_data)
Rows: 18,244
Columns: 11
$ Rank          <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14, 15, 16, 17,…
$ Time          <chr> "00:01:40.910000", "00:01:41.010000", "00:01:41.090000",…
$ Name          <chr> "David Rudisha", "David Rudisha", "David Rudisha", "Wils…
$ Country       <chr> "KEN", "KEN", "KEN", "DEN", "DEN", "KEN", "KEN", "KEN", …
$ Date.of.Birth <chr> "1988-12-17", "1988-12-17", "1988-12-17", "1970-12-12", …
$ Place         <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,…
$ City          <chr> "London", "Rieti", "Berlin", "Köln", "Zürich", "Rieti", …
$ Date          <dttm> 2012-09-08, 2010-08-29, 2010-08-22, 1997-08-24, 1997-08…
$ Gender        <chr> "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", …
$ Event         <chr> "800 m", "800 m", "800 m", "800 m", "800 m", "800 m", "8…
$ Age           <dbl> 24, 22, 22, 27, 27, 23, 22, 24, 25, 27, 18, 24, 21, 26, …
In [ ]:
color_per_gender <- c("Men" = "#2986cc", "Women" = "#d5a6bd")
color_per_medal <- c("Gold_medals" = "#D6AF36", "Silver_medals"= "#A7A7AD", "Bronze_medals" = "#A77044")

Performance entre sexo¶

Vamos começar analisando a performace entre homens e mulhes

In [ ]:
runners_women <- runners_data |>
filter(Gender == "Women")

runners_men <- runners_data |>
filter(Gender == "Men")

runners_men$Year <- year(runners_men$Date)
runners_women$Year <- year(runners_women$Date)
runners_data$Year <- year(runners_data$Date)
In [ ]:
glimpse(runners_data)
Rows: 18,244
Columns: 12
$ Rank          <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14, 15, 16, 17,…
$ Time          <chr> "00:01:40.910000", "00:01:41.010000", "00:01:41.090000",…
$ Name          <chr> "David Rudisha", "David Rudisha", "David Rudisha", "Wils…
$ Country       <chr> "KEN", "KEN", "KEN", "DEN", "DEN", "KEN", "KEN", "KEN", …
$ Date.of.Birth <chr> "1988-12-17", "1988-12-17", "1988-12-17", "1970-12-12", …
$ Place         <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,…
$ City          <chr> "London", "Rieti", "Berlin", "Köln", "Zürich", "Rieti", …
$ Date          <dttm> 2012-09-08, 2010-08-29, 2010-08-22, 1997-08-24, 1997-08…
$ Gender        <chr> "Men", "Men", "Men", "Men", "Men", "Men", "Men", "Men", …
$ Event         <chr> "800 m", "800 m", "800 m", "800 m", "800 m", "800 m", "8…
$ Age           <dbl> 24, 22, 22, 27, 27, 23, 22, 24, 25, 27, 18, 24, 21, 26, …
$ Year          <dbl> 2012, 2010, 2010, 1997, 1997, 2011, 2010, 2012, 1981, 19…
In [ ]:
performance_runners_men <- runners_men |>
group_by(Year) |>
filter(Year <= 2016) |>
summarise(
    Gold_medals = sum(Place == 1,na.rm = TRUE),
    Silver_medals = sum(Place == 2,na.rm = TRUE),
    Bronze_medals = sum(Place == 3,na.rm = TRUE),
    Total = Gold_medals + Silver_medals + Bronze_medals
)
glimpse(performance_runners_men)
Rows: 52
Columns: 5
$ Year          <dbl> 1962, 1965, 1966, 1967, 1968, 1969, 1971, 1972, 1973, 19…
$ Gold_medals   <int> 1, 1, 1, 2, 7, 1, 2, 5, 3, 10, 7, 7, 16, 14, 14, 15, 18,…
$ Silver_medals <int> 0, 0, 0, 0, 4, 0, 0, 2, 0, 1, 0, 3, 4, 2, 0, 3, 1, 8, 7,…
$ Bronze_medals <int> 0, 0, 0, 0, 3, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 2, 4,…
$ Total         <int> 1, 1, 1, 2, 14, 1, 2, 7, 3, 12, 7, 11, 21, 17, 14, 19, 1…
In [ ]:
performance_runners_women <- runners_women |>
group_by(Year) |>
filter(Year <= 2016) |>
summarise(
    Gold_medals = sum(Place == 1,na.rm = TRUE),
    Silver_medals = sum(Place == 2,na.rm = TRUE),
    Bronze_medals = sum(Place == 3,na.rm = TRUE),
    Total = Gold_medals + Silver_medals + Bronze_medals
)
glimpse(performance_runners_women)
Rows: 45
Columns: 5
$ Year          <dbl> 1964, 1972, 1973, 1974, 1976, 1977, 1978, 1979, 1980, 19…
$ Gold_medals   <int> 1, 2, 3, 8, 28, 12, 22, 47, 59, 49, 59, 62, 119, 78, 90,…
$ Silver_medals <int> 0, 1, 0, 0, 13, 2, 8, 13, 27, 15, 18, 26, 54, 33, 30, 30…
$ Bronze_medals <int> 0, 0, 0, 0, 7, 0, 5, 7, 16, 8, 9, 12, 25, 11, 8, 14, 26,…
$ Total         <int> 1, 3, 3, 8, 48, 14, 35, 67, 102, 72, 86, 100, 198, 122, …
In [ ]:
performance_runners_men$Gender <- "Men"
performance_runners_women$Gender <- "Women"
combined_data_performance_runners <- rbind(performance_runners_men, performance_runners_women)
subtitle_text <- "Diferença entre a quantidade de medalhas de homens e mulheres ao passar do tempo"
wrapped_subtitle <- str_wrap(subtitle_text, width = 73)  

ggplot(data = combined_data_performance_runners, aes(x = Year, y = Total, color = Gender)) + geom_line(alpha = 0.4) + scale_x_continuous(breaks = seq(1960, 2015, 10)) +
geom_smooth(se = FALSE) + theme_wsj(color = "gray") + scale_color_manual(values = color_per_gender)  +
labs(
    title = "Performance dos corredores",
    subtitle = wrapped_subtitle
) + theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "bottom"
)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
In [ ]:
subtitle_text <- "Separando o desempenho dos competidores em categorias de medalhas conquistadas,ouro,prata e bronze, respectivamente"
wrapped_subtitle <- str_wrap(subtitle_text, width = 120)  
gold_medal_plot <- ggplot(data = combined_data_performance_runners, mapping = aes(y = Gold_medals, x  = Year,color = Gender)) + geom_line() + theme_wsj(color = "gray") +
scale_color_manual(values = color_per_gender) + labs(
    title  = "Medalhas entre homens e mulheres",
     subtitle = wrapped_subtitle
)  + theme(
    plot.title = element_text(hjust = 0.5,size = 15),
    plot.subtitle = element_text(size = 15)
) + guides(color = FALSE)
In [ ]:
silver_medal_plot <- ggplot(data = combined_data_performance_runners, mapping = aes(y = Silver_medals, x  = Year,color = Gender)) + geom_line() + theme_wsj(color = "gray") + guides(color = FALSE)+ 
scale_color_manual(values = color_per_gender) + labs(
    y = "medalhas de prata"
)
In [ ]:
bronze_medal_plot <- ggplot(data = combined_data_performance_runners, mapping = aes(y = Silver_medals, x  = Year,color = Gender)) + geom_line() + theme_wsj(color = "gray") + 
scale_color_manual(values = color_per_gender) + labs(
    y = "medalhas de bronze"
) + theme(
    legend.position = "bottom"
)
In [ ]:
grid.arrange(gold_medal_plot,silver_medal_plot,bronze_medal_plot)
In [ ]:
head(combined_data_performance_runners)
A tibble: 6 x 6
YearGold_medalsSilver_medalsBronze_medalsTotalGender
<dbl><int><int><int><int><chr>
1962100 1Men
1965100 1Men
1966100 1Men
1967200 2Men
196874314Men
1969100 1Men
In [ ]:
subtitle_text <- "Separando o desempenho dos competidores em categorias de medalhas conquistadas,ouro,prata e bronze, respectivamente"
wrapped_subtitle <- str_wrap(subtitle_text, width = 120)
In [ ]:
gold_medal_plot  <- ggplot(data = combined_data_performance_runners,mapping = aes(x = Gold_medals, fill = Gender)) + geom_histogram(bins = 20) + theme_wsj(color = "gray") + 
scale_color_manual(values = color_per_gender) + scale_fill_manual(values = color_per_gender)+ guides(fill = FALSE) + labs(title = "Quantidade de medalhas",subtitle = wrapped_subtitle) + 
theme(plot.title = element_text(hjust = 0.5),plot.subtitle = element_text(size = 15))

silver_medal_plot <- ggplot(data = combined_data_performance_runners,mapping = aes(x = Silver_medals, fill = Gender)) + geom_histogram(bins = 20) + theme_wsj(color = "gray") + guides(fill = FALSE) + scale_fill_manual(values = color_per_gender)

bronze_medal_plot <- ggplot(data = combined_data_performance_runners,mapping = aes(x = Bronze_medals, fill = Gender)) + geom_histogram(bins = 20) + theme_wsj(color = "gray")  + scale_fill_manual(values = color_per_gender) + theme(legend.position = "bottom")

grid.arrange(gold_medal_plot,silver_medal_plot,bronze_medal_plot)

Performance por país¶

In [ ]:
performance_country <- runners_data |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Country,sort = TRUE)

performance_country_filtered <- head(performance_country$Country,9)
performance_country_filtered <- as.list(performance_country_filtered)
In [ ]:
countries_performance_per_year <- runners_data |>
group_by(Country,Year) |>
filter(Place == 1 | Place == 2 | Place == 3, Year <= 2016) |>
count(Country,sort =  TRUE) |>
rename(total = n)  

top_countries_performance_per_year <- countries_performance_per_year |> filter(Country %in% performance_country_filtered)
In [ ]:
subtitle_text <- "Diferença entre a quantidade de medalhas dos países ao passar do tempo"
wrapped_subtitle <- str_wrap(subtitle_text, width = 73)  

ggplot(data = top_countries_performance_per_year, mapping = aes(x = Year, y = total)) + geom_line(mapping = aes(group = Country)) + 
facet_wrap(~Country, ncol = 3)  + theme_economist_white() + 
labs(title = "Quantidade de medalhas conquitadas por País", x = NULL, y = NULL) + 
theme(plot.title = element_text(hjust = 0.5), )
In [ ]:
performance_country_by_sex <- runners_data |>
group_by(Country,Gender,Year) |>
filter(Place == 1 | Place == 2 | Place == 3, Year <= 2016) |>
count(Gender,sort =  TRUE) |>
rename(total = n) 
top_performance_country_by_sex <- performance_country_by_sex |> filter(Country %in% performance_country_filtered)
In [ ]:
ggplot(data = top_performance_country_by_sex, mapping = aes(x = Year, y = total,color = Gender)) + geom_line() + 
facet_wrap(~Country, ncol = 3) + theme_economist_white() + 
labs(
    title  = "Medalhas entre homens e mulheres",
     subtitle = wrapped_subtitle
)  + theme(
    plot.title = element_text(hjust = 0.5,size = 15),
    plot.subtitle = element_text(size = 15)
)
In [ ]:
performance_runners_men <- runners_men |>
group_by(Country) |>
filter(Year <= 2016) |>
summarise(
    Gold_medals = sum(Place == 1,na.rm = TRUE),
    Silver_medals = sum(Place == 2,na.rm = TRUE),
    Bronze_medals = sum(Place == 3,na.rm = TRUE),
    Total = Gold_medals + Silver_medals + Bronze_medals
)
In [ ]:
performance_runners_men <- performance_runners_men |>
pivot_longer(
  cols = c(Gold_medals, Silver_medals, Bronze_medals),
  names_to = "Medal",
  values_to = "Count"
)
performance_runners_men <-as.data.frame(performance_runners_men)
performance_runners_men <- performance_runners_men |> filter(Country %in% performance_country_filtered)
In [ ]:
subtitle_text <- "Separando o desempenho dos competidores homens em categorias de medalhas conquistadas,ouro,prata e bronze, respectivamente."
wrapped_subtitle <- str_wrap(subtitle_text, width = 95) 
ggplot(data = performance_runners_men, mapping = aes(x = reorder(Country,Count), y = Count,fill = Medal)) + 
geom_bar(stat = "identity",position = "dodge2") + coord_flip() + theme_wsj(color = "gray") + scale_fill_manual(values = color_per_medal) +
theme(
    legend.position = "bottom"
) + labs(
    title = "Performance dos países na categoria masculina",
    subtitle = wrapped_subtitle
) + theme(plot.title = element_text(hjust = 0.5),plot.subtitle = element_text(size = 19))
In [ ]:
performance_runners_women <- runners_women|>
group_by(Country) |>
filter(Year <= 2016) |>
summarise(
    Gold_medals = sum(Place == 1,na.rm = TRUE),
    Silver_medals = sum(Place == 2,na.rm = TRUE),
    Bronze_medals = sum(Place == 3,na.rm = TRUE),
    Total = Gold_medals + Silver_medals + Bronze_medals
)
In [ ]:
performance_runners_women <- performance_runners_women |>
pivot_longer(
  cols = c(Gold_medals, Silver_medals, Bronze_medals),
  names_to = "Medal",
  values_to = "Count"
)
performance_runners_women <-as.data.frame(performance_runners_women)
performance_runners_women <- performance_runners_women |> filter(Country %in% performance_country_filtered)
In [ ]:
subtitle_text <- "Separando o desempenho das competidoras mulheres em categorias de medalhas conquistadas,ouro,prata e bronze, respectivamente."
wrapped_subtitle <- str_wrap(subtitle_text, width = 95) 
ggplot(data = performance_runners_women, mapping = aes(x = reorder(Country,Count), y = Count,fill = Medal)) + 
geom_bar(stat = "identity",position = "dodge2") + coord_flip() + theme_wsj(color = "gray") + scale_fill_manual(values = color_per_medal) +
theme(
    legend.position = "bottom"
) + labs(
    title = "Performance dos países na categoria feminina",
    subtitle = wrapped_subtitle
) + theme(plot.title = element_text(hjust = 0.5),plot.subtitle = element_text(size = 19))

Relação entre a idade e velocidade¶

In [ ]:
fastest_time <- runners_data |>
group_by(Time,Age) |>
arrange(Time,sort = TRUE)

temp_list <- c()
temp_list_1 <- c("Age<=20","20<Age<25","25<Age<30","30<Age<35","Age>=35")
 
temp_list <- append(temp_list,fastest_time |> filter(Age <= 20) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 25 & Age >= 20 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 30 & Age >= 25 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 35 & Age >= 30 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age >= 35) |> nrow())

fastest_time <- data.frame(Age = temp_list_1,Total = temp_list)
In [ ]:
ggplot(data = fastest_time, mapping = aes(x = Age, y = Total, fill = Age)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c("gray", "#4F6D7A", "gray","gray","gray")) +
  theme_wsj(color = "gray") +
  labs(title = "Idade média dos corredores") +
  theme(plot.title = element_text(hjust = 0.5),legend.position = "bottom")
In [ ]:
fastest_time <- runners_data |>
group_by(Time,Age) |>
filter(Event == "100 m") |>
arrange(Time,sort = TRUE)

mean_100m <- mean(fastest_time$Age)
temp_list <- c()
temp_list_1 <- c("Age<=20","20<Age<25","25<Age<30","30<Age<35","Age>=35")
 
temp_list <- append(temp_list,fastest_time |> filter(Age <= 20) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 25 & Age >= 20 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 30 & Age >= 25 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 35 & Age >= 30 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age >= 35) |> nrow())

fastest_time <- data.frame(Age = temp_list_1,Total = temp_list)

ggplot(data = fastest_time, mapping = aes(x = Age,fill = Age)) +
  geom_bar(mapping = aes(y = Total), stat = "identity") +
  scale_fill_manual(values = c("#56A3A6", "#4F6D7A","gray","gray","gray")) +
  theme_wsj(color = "gray") +
  labs(title = "Idade média entre os corredores de 100m") + theme(
    plot.title = element_text(hjust = 0.5), 
    legend.position = "bottom"
  )
In [ ]:
mean_100m
26.4383940932164
In [ ]:
fastest_time <- runners_data |>
group_by(Time,Age) |>
filter(Event == "Marathon") |>
arrange(Time,sort = TRUE) |>
na.omit()

mean_marathon <- mean(fastest_time$Age)
temp_list <- c()
temp_list_1 <- c("Age<=20","20<Age<25","25<Age<30","30<Age<35","Age>=35")
 
temp_list <- append(temp_list,fastest_time |> filter(Age <= 20) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 25 & Age >= 20 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 30 & Age >= 25 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age <= 35 & Age >= 30 ) |> nrow())
temp_list <- append(temp_list,fastest_time |> filter(Age >= 35) |> nrow())

fastest_time <- data.frame(Age = temp_list_1,Total = temp_list)

ggplot(data = fastest_time, mapping = aes(x = Age, fill = Age)) +
  geom_bar(mapping = aes(y = Total), stat = "identity") +
  scale_fill_manual(values = c("gray", "#4F6D7A","#56A3A6","gray","gray")) +
  theme_wsj(color = "gray") +
  labs(title = "Idade média entre os corredores das maratonas") + theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "bottom"
    
  )
In [ ]:
mean_marathon
28.3914666666667

Corredores com a maior quantidade de medalhas¶

In [ ]:
most_medals_men <- runners_men |>
group_by(Name) |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)

most_medals_men <- head(most_medals_men,10)
head(most_medals_men)
most_medals_men_list = as.list(most_medals_men$Name)
A grouped_df: 6 x 2
Namen
<chr><int>
Michael Johnson 122
Usain Bolt 107
Asafa Powell 106
LaShawn Merritt 97
Frank Fredericks 81
Justin Gatlin 78
In [ ]:
medals_over_time_fastest_men <- runners_men |>
filter(Name %in% most_medals_men_list) |>
group_by(Year) |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)

ggplot(data = medals_over_time_fastest_men, mapping = aes(x = Year, y = n,color = Name)) + geom_line(linetype = "dashed") +
theme_excel_new() +
labs(
    title = "Os corredores masculinos com a maior quantidade de medalhas.",
) + theme(
    plot.title = element_text(hjust = 0.5,size = 20,face = "bold"),
    legend.position = "bottom"
)
Warning message:
“Removed 1 row containing missing values (`geom_line()`).”
In [ ]:
medals_over_time_fastest_men <- runners_men |>
filter(Name %in% most_medals_men_list) |>
group_by(Year) |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)

ggplot(data = medals_over_time_fastest_men, mapping = aes(x = Year, y = n,color = Name)) + geom_line(alpha = 0.3) +
geom_line(data = subset(medals_over_time_fastest_men, Name == "Usain Bolt"), size = 1) +
geom_line(data = subset(medals_over_time_fastest_men, Name == "Michael Johnson"), size = 1) +
theme_excel_new() + annotate("text", x = 1992, y = 16.5, label = "Michael Johnson",size =5) +
geom_segment(
    aes(x = 1992, y =16 , xend = 1995, yend = 15),
    arrow = arrow(length = unit(0.3, "cm")),
    color = "#2E4F4F",
    size = 0.5
)   + annotate("text", x = 2010, y = 16.5, label = "Usain Bolt",size =5) +
geom_segment(
    aes(x = 2010, y = 16 , xend = 2008.4, yend = 15),
    arrow = arrow(length = unit(0.3, "cm")),
    color = "#2E4F4F",
    size = 0.5
)   +
labs(
    title = "Os dois corredores masculinos com a maior quantidade de medalhas.",
) + theme(
    plot.title = element_text(hjust = 0.5,size = 20,face = "bold"),
    legend.position = "bottom"
)
Warning message:
“Removed 1 row containing missing values (`geom_line()`).”
In [ ]:
most_medals_women <- runners_women |>
group_by(Name) |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)

most_medals_women <- head(most_medals_women,10)
head(most_medals_women)
most_medals_women_list = as.list(most_medals_women$Name)
A grouped_df: 6 x 2
Namen
<chr><int>
Merlene Ottey 174
Gwen Torrence 95
Veronica Campbell-Brown 84
Sanya Richards-Ross 83
Maria Mutola 80
Allyson Felix 79
In [ ]:
medals_over_time_fastest_women <- runners_women |>
filter(Name %in% most_medals_women_list) |>
group_by(Year) |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)

ggplot(data = medals_over_time_fastest_women, mapping = aes(x = Year, y = n,color = Name)) + geom_line(linetype = "dashed") +
theme_excel_new() +
labs(
    title = "As corredoras femininas com a maior quantidade de medalhas.",
) + theme(
    plot.title = element_text(hjust = 0.5,size = 20,face = "bold"),
    legend.position = "bottom"
)
In [ ]:
ggplot(data = medals_over_time_fastest_women, mapping = aes(x = Year, y = n,color = Name)) + geom_line(alpha = 0.3) +
geom_line(data = subset(medals_over_time_fastest_women, Name == "Merlene Ottey"), size = 1) +
theme_excel_new() + annotate("text", x = 1988, y = 20.5, label = "Merlene Ottey",size =5) +
geom_segment(
    aes(x = 1988, y = 20, xend = 1990, yend = 18),
    arrow = arrow(length = unit(0.3, "cm")),
    color = "#2E4F4F",
    size = 0.5
)   +
labs(
    title = "As duas corredoras femininas com a maior quantidade de medalhas.",
) + theme(
    plot.title = element_text(hjust = 0.5,size = 20,face = "bold"),
    legend.position = "bottom"
)

Melhores competidores dos 100 metros¶

In [ ]:
runners_men_100m <- runners_men |>
filter(Event == "100 m") |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)


top_100m_men <- head(runners_men_100m,5)
top_100m_men_list <- as.list(top_100m_men$Name)
In [ ]:
performance_runners_men <- runners_men |>
group_by(Name) |>
filter(Year <= 2016) |>
summarise(
    Gold_medals = sum(Place == 1,na.rm = TRUE),
    Silver_medals = sum(Place == 2,na.rm = TRUE),
    Bronze_medals = sum(Place == 3,na.rm = TRUE),
    Total = Gold_medals + Silver_medals + Bronze_medals
)
In [ ]:
head(performance_runners_men)
top_100m_men_list
A tibble: 6 x 5
NameGold_medalsSilver_medalsBronze_medalsTotal
<chr><int><int><int><int>
Aaron Armstrong 1001
Aaron Brown 4206
Aaron Ernest 1001
Abadi Hadis 0011
Abayneh Ayele 0000
Abdalelah Haroun4105
  1. 'Asafa Powell'
  2. 'Justin Gatlin'
  3. 'Maurice Greene'
  4. 'Usain Bolt'
  5. 'Tyson Gay'
In [ ]:
performance_runners_men <- performance_runners_men |>
pivot_longer(
  cols = c(Gold_medals, Silver_medals, Bronze_medals),
  names_to = "Medal",
  values_to = "Count"
)
performance_runners_men <-as.data.frame(performance_runners_men)
performance_runners_men <- performance_runners_men |> filter(Name %in% top_100m_men_list)
In [ ]:
ggplot(data = performance_runners_men, mapping = aes(x = reorder(Name,Count), y = Count,fill = Medal)) + 
geom_bar(stat = "identity",position = "dodge2") + coord_flip() + theme_wsj(color = "gray") + scale_fill_manual(values = color_per_medal) +
labs(
    title = "Maiores corredores dos 100 metros"
) + theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "bottom"
)
In [ ]:
runners_women100m <- runners_women |>
filter(Event == "100 m") |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE)

top_100m_women <- head(runners_women100m,5)
top_100m_women_list <- as.list(top_100m_women$Name)
In [ ]:
performance_runners_women <- runners_women |>
group_by(Name) |>
filter(Year <= 2016) |>
summarise(
    Gold_medals = sum(Place == 1,na.rm = TRUE),
    Silver_medals = sum(Place == 2,na.rm = TRUE),
    Bronze_medals = sum(Place == 3,na.rm = TRUE),
    Total = Gold_medals + Silver_medals + Bronze_medals
)
In [ ]:
head(performance_runners_women)
top_100m_women_list
A tibble: 6 x 5
NameGold_medalsSilver_medalsBronze_medalsTotal
<chr><int><int><int><int>
Ababel Yeshaneh0101
Abeba Aregawi 5308
Abebe Arigawi 5128
Abebe Tola 0000
Abebech Afework1113
Abebu Gelan 0000
  1. 'Merlene Ottey'
  2. 'Veronica Campbell-Brown'
  3. 'Carmelita Jeter'
  4. 'Gwen Torrence'
  5. 'Marion Jones'
In [ ]:
performance_runners_women <- performance_runners_women |>
pivot_longer(
  cols = c(Gold_medals, Silver_medals, Bronze_medals),
  names_to = "Medal",
  values_to = "Count"
)
performance_runners_women <-as.data.frame(performance_runners_women)
performance_runners_women <- performance_runners_women |> filter(Name %in% top_100m_women_list)
In [ ]:
ggplot(data = performance_runners_women, mapping = aes(x = reorder(Name,Count), y = Count,fill = Medal)) + 
geom_bar(stat = "identity",position = "dodge2") + coord_flip()  + theme_wsj(color = "gray") + scale_fill_manual(values = color_per_medal) +
labs(
    title = "Maiores corredoras dos 100 metros"
) + theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "bottom"
)
In [ ]:
fastest_time_100m_men <- runners_men |>
filter(Event == "100 m") |>
arrange(Time)
top_50_fastest_time_100m_men <- head(fastest_time_100m_men,50)

ggplot(data = top_50_fastest_time_100m_men, mapping = aes(x = Age,y = Time,color = Name)) + geom_point(mapping = aes(group = Name))  +
annotate(
    geom = "rect", xmin = 21, xmax = 28,
    ymin = 0, ymax = 7, fill = "orange", alpha = 0.2
) + theme_bw() + guides(color =  FALSE) +
geom_text_repel(data = subset(top_50_fastest_time_100m_men, Time <= "00:00:09.740000"), mapping = aes(label = Name),size = 3.5, vjust = 0.2,hjust = 0.2,fontface = "italic") +
labs(
    title = "Os corredores com os tempos mais rápidos na categoria de 100 metros."
) + theme(
    plot.title = element_text(hjust = 0.5,size = 20,face = "bold"),
    legend.position = "bottom"
)

Melhores competidores das maratonas¶

In [ ]:
head(
runners_men |>
filter(Event == "Marathon") |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE, .keep = Time)
,3)
A data.frame: 3 x 3
Name.keepn
<chr><chr><int>
1Abel Kirui 02:07:382
2Paul Biwott 02:08:172
3Abdelkader El Mouaziz02:07:111
In [ ]:
Abel_Kirui_runner <- subset(runners_data, Name == "Abel Kirui" & Event == "Marathon")
In [ ]:
head(
runners_women |>
filter(Event == "Marathon") |>
filter(Place == 1 | Place == 2 | Place == 3) |>
count(Name,sort = TRUE, .keep = Time)
,3)
A data.frame: 3 x 3
Name.keepn
<chr><chr><int>
1Aselefech Mergia 02:25:322
2Jelena Prokopcuka02:24:072
3Mare Dibaba 02:19:522
In [ ]:
fastest_time_marathon_men <- runners_men |>
filter(Event == "Marathon") |>
arrange(Time)
top_50_fastest_time_marathon_men <- head(fastest_time_marathon_men,50)

ggplot(data = top_50_fastest_time_marathon_men, mapping = aes(x = Age, Time,color = Name)) + geom_point(mapping = aes(group = Name)) +
annotate(
    geom = "rect", xmin = 28, xmax = 36,
    ymin = 0, ymax = 14, fill = "orange", alpha = 0.2
    ) + theme_bw() + guides(color =  FALSE) +
geom_text_repel(data = subset(top_50_fastest_time_marathon_men, Time <= "02:03:58"), mapping = aes(label = Name),size = 3, vjust = 0.2,hjust = 0.2,) +
labs(
    title = "Os corredores com os tempos mais rápidos na categoria de maratona."
) + theme(
    plot.title = element_text(hjust = 0.5,size = 20,face = "bold"),
    legend.position = "bottom"
)

Evolução dos corredores¶

In [ ]:
performance_country_filtered <- head(performance_country$Country,3)
performance_country_filtered <- as.list(performance_country_filtered)
countries_performance_per_year <- runners_data |>
group_by(Country,Year) |>
filter(Place == 1 | Place == 2 | Place == 3, Year <= 2016) |>
count(Country,sort =  TRUE) |>
rename(total = n) 
top_countries_performance_per_year <- countries_performance_per_year |> filter(Country %in% performance_country_filtered)
head(top_countries_performance_per_year)
A grouped_df: 6 x 3
CountryYeartotal
<chr><dbl><int>
KEN2011199
KEN2012198
KEN2016190
KEN2014183
KEN2015174
KEN2010168
In [ ]:
top_countries_performance_per_year$Year
  1. 2011
  2. 2012
  3. 2016
  4. 2014
  5. 2015
  6. 2010
  7. 2015
  8. 2008
  9. 2013
  10. 2009
  11. 2007
  12. 1988
  13. 2013
  14. 1996
  15. 2012
  16. 2006
  17. 2016
  18. 2007
  19. 2008
  20. 2012
  21. 2016
  22. 2013
  23. 2015
  24. 2009
  25. 2000
  26. 2010
  27. 2004
  28. 2014
  29. 2005
  30. 2006
  31. 1999
  32. 2014
  33. 2004
  34. 1992
  35. 2000
  36. 2002
  37. 2011
  38. 2003
  39. 2001
  40. 1997
  41. 2005
  42. 1999
  43. 2010
  44. 2011
  45. 1997
  46. 1998
  47. 1991
  48. 2008
  49. 2009
  50. 1993
  51. 1994
  52. 1998
  53. 1996
  54. 1984
  55. 1995
  56. 2004
  57. 2003
  58. 2005
  59. 2007
  60. 1987
  61. 1995
  62. 2002
  63. 2003
  64. 2006
  65. 1986
  66. 1985
  67. 1992
  68. 1990
  69. 1989
  70. 1983
  71. 1993
  72. 2000
  73. 2001
  74. 2001
  75. 1982
  76. 1994
  77. 1989
  78. 1991
  79. 1998
  80. 1999
  81. 1981
  82. 1995
  83. 2002
  84. 1968
  85. 1996
  86. 1997
  87. 1990
  88. 1980
  89. 1979
  90. 1987
  91. 1994
  92. 1993
  93. 1974
  94. 1977
  95. 1984
  96. 1988
  97. 1974
  98. 1976
  99. 1978
  100. 1992
  101. 1972
  102. 1975
  103. 1977
  104. 1975
  105. 1976
  106. 1982
  107. 1986
  108. 1967
  109. 1981
  110. 1982
  111. 1987
  112. 1988
  113. 1989
  114. 1991
  115. 1978
  116. 1979
  117. 1980
  118. 1981
  119. 1983
  120. 1985
  121. 1966
  122. 1969
  123. 1971
  124. 1973
In [ ]:
top_countries_performance_per_year$Year <- as.numeric(top_countries_performance_per_year$Year)
In [ ]:
top_countries_performance_per_year$Year
  1. 2011
  2. 2012
  3. 2016
  4. 2014
  5. 2015
  6. 2010
  7. 2015
  8. 2008
  9. 2013
  10. 2009
  11. 2007
  12. 1988
  13. 2013
  14. 1996
  15. 2012
  16. 2006
  17. 2016
  18. 2007
  19. 2008
  20. 2012
  21. 2016
  22. 2013
  23. 2015
  24. 2009
  25. 2000
  26. 2010
  27. 2004
  28. 2014
  29. 2005
  30. 2006
  31. 1999
  32. 2014
  33. 2004
  34. 1992
  35. 2000
  36. 2002
  37. 2011
  38. 2003
  39. 2001
  40. 1997
  41. 2005
  42. 1999
  43. 2010
  44. 2011
  45. 1997
  46. 1998
  47. 1991
  48. 2008
  49. 2009
  50. 1993
  51. 1994
  52. 1998
  53. 1996
  54. 1984
  55. 1995
  56. 2004
  57. 2003
  58. 2005
  59. 2007
  60. 1987
  61. 1995
  62. 2002
  63. 2003
  64. 2006
  65. 1986
  66. 1985
  67. 1992
  68. 1990
  69. 1989
  70. 1983
  71. 1993
  72. 2000
  73. 2001
  74. 2001
  75. 1982
  76. 1994
  77. 1989
  78. 1991
  79. 1998
  80. 1999
  81. 1981
  82. 1995
  83. 2002
  84. 1968
  85. 1996
  86. 1997
  87. 1990
  88. 1980
  89. 1979
  90. 1987
  91. 1994
  92. 1993
  93. 1974
  94. 1977
  95. 1984
  96. 1988
  97. 1974
  98. 1976
  99. 1978
  100. 1992
  101. 1972
  102. 1975
  103. 1977
  104. 1975
  105. 1976
  106. 1982
  107. 1986
  108. 1967
  109. 1981
  110. 1982
  111. 1987
  112. 1988
  113. 1989
  114. 1991
  115. 1978
  116. 1979
  117. 1980
  118. 1981
  119. 1983
  120. 1985
  121. 1966
  122. 1969
  123. 1971
  124. 1973
In [ ]:
glimpse(top_countries_performance_per_year)
Rows: 124
Columns: 3
Groups: Country, Year [124]
$ Country <chr> "KEN", "KEN", "KEN", "KEN", "KEN", "KEN", "USA", "KEN", "KEN",…
$ Year    <dbl> 2011, 2012, 2016, 2014, 2015, 2010, 2015, 2008, 2013, 2009, 20…
$ total   <int> 199, 198, 190, 183, 174, 168, 166, 143, 143, 131, 123, 118, 11…
In [ ]:
top_countries_performance_per_year$Year <- as.numeric(as.character(top_countries_performance_per_year$Year))

p <- ggplot(data = top_countries_performance_per_year, mapping = aes(x = Year, y = total, color = Country)) +
  geom_line(size = 2, alpha = 0.75) +
  transition_reveal(Year) +
  theme_wsj(color = "gray") +
  scale_color_brewer(palette = "Dark2")

animate(p, width = 1000, height = 800, fps = 10)
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
# A tibble: 100 × 7
   format width height colorspace matte filesize density
   <chr>  <int>  <int> <chr>      <lgl>    <int> <chr>  
 1 gif     1000    800 sRGB       TRUE         0 72x72  
 2 gif     1000    800 sRGB       TRUE         0 72x72  
 3 gif     1000    800 sRGB       TRUE         0 72x72  
 4 gif     1000    800 sRGB       TRUE         0 72x72  
 5 gif     1000    800 sRGB       TRUE         0 72x72  
 6 gif     1000    800 sRGB       TRUE         0 72x72  
 7 gif     1000    800 sRGB       TRUE         0 72x72  
 8 gif     1000    800 sRGB       TRUE         0 72x72  
 9 gif     1000    800 sRGB       TRUE         0 72x72  
10 gif     1000    800 sRGB       TRUE         0 72x72  
# ℹ 90 more rows
In [ ]:
top_countries_performance_per_year$Year <- as.numeric(top_countries_performance_per_year$Year)

p <- ggplot(data = top_countries_performance_per_year, aes(x = Year, y = total, group = Country, color = Country)) +
  geom_line(size = 2, alpha = 0.75) +
  geom_segment(aes(xend = max(Year) + 1, yend = total), linetype = 2, color = 'grey') +
  geom_point(size = 2) +
  geom_text(aes(x = max(Year) + 1, label = Country), hjust = 0) +
  transition_reveal(Year) +
  coord_cartesian(clip = 'off') +
  labs(title = 'Total Performance by Country', y = 'Total') +
  theme_minimal() +
  theme(plot.margin = margin(5.5, 40, 5.5, 5.5)) + guides(color = FALSE)

animate(p, width = 1000, height = 700, fps = 10)
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
# A tibble: 100 × 7
   format width height colorspace matte filesize density
   <chr>  <int>  <int> <chr>      <lgl>    <int> <chr>  
 1 gif     1000    700 sRGB       TRUE         0 72x72  
 2 gif     1000    700 sRGB       TRUE         0 72x72  
 3 gif     1000    700 sRGB       TRUE         0 72x72  
 4 gif     1000    700 sRGB       TRUE         0 72x72  
 5 gif     1000    700 sRGB       TRUE         0 72x72  
 6 gif     1000    700 sRGB       TRUE         0 72x72  
 7 gif     1000    700 sRGB       TRUE         0 72x72  
 8 gif     1000    700 sRGB       TRUE         0 72x72  
 9 gif     1000    700 sRGB       TRUE         0 72x72  
10 gif     1000    700 sRGB       TRUE         0 72x72  
# ℹ 90 more rows
In [ ]:
top_countries_performance_per_year$Year <- as.numeric(top_countries_performance_per_year$Year)

p <- ggplot(data = top_countries_performance_per_year, aes(x = Year, y = total, group = Country, color = Country)) +
  geom_line(size = 2, alpha = 0.75) +
  geom_segment(aes(xend = max(Year) + 1, yend = total), linetype = 2, color = 'grey') +
  geom_point(size = 2) +
  geom_text(aes(x = max(Year) + 1, label = Country), hjust = 0) +
  transition_reveal(Year) +
  coord_cartesian(clip = 'off') +
  labs(title = 'Total Performance by Country', y = 'Total') +
  theme_minimal() +
  theme(plot.margin = margin(5.5, 40, 5.5, 5.5)) +
  guides(color = FALSE)

animate(p, width = 1000, height = 700, fps = 10, nframes = 200)
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
# A tibble: 200 × 7
   format width height colorspace matte filesize density
   <chr>  <int>  <int> <chr>      <lgl>    <int> <chr>  
 1 gif     1000    700 sRGB       TRUE         0 72x72  
 2 gif     1000    700 sRGB       TRUE         0 72x72  
 3 gif     1000    700 sRGB       TRUE         0 72x72  
 4 gif     1000    700 sRGB       TRUE         0 72x72  
 5 gif     1000    700 sRGB       TRUE         0 72x72  
 6 gif     1000    700 sRGB       TRUE         0 72x72  
 7 gif     1000    700 sRGB       TRUE         0 72x72  
 8 gif     1000    700 sRGB       TRUE         0 72x72  
 9 gif     1000    700 sRGB       TRUE         0 72x72  
10 gif     1000    700 sRGB       TRUE         0 72x72  
# ℹ 190 more rows